import os
os.chdir("../")
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
import numpy as np
import matplotlib.pyplot as plt

RESULT_PATH = "results/iql_param_sweep/iql_results/"

path_001_50 = "001_50/"
path_001_100 = "001_100/"
path_001_200 = "001_200/"
path_001_500 = "001_500/"
path_01_100 = "01_100/"
path_0001_100 = "0001_100/"
path_00001_100 = "-05_100/"

NUM_RUNS = 3
NUM_EPISODES = 25000
episodes_indices = [i for i in range(0, NUM_EPISODES, 20)]

iql_reward_001_50 = np.load(RESULT_PATH + path_001_50 + "iql_running_reward_argmax.npy")
iql_reward_mean_001_50 = np.mean(iql_reward_001_50, axis = 0)
iql_reward_std_001_50 = np.std(iql_reward_001_50, axis = 0) / np.sqrt(NUM_RUNS)
print(np.mean(iql_reward_mean_001_50))

iql_reward_001_100 = np.load(RESULT_PATH + path_001_100 + "iql_running_reward_argmax.npy")
iql_reward_mean_001_100 = np.mean(iql_reward_001_100, axis = 0)
iql_reward_std_001_100 = np.std(iql_reward_001_100, axis = 0) / np.sqrt(NUM_RUNS)
print(np.mean(iql_reward_mean_001_100))

iql_reward_001_200 = np.load(RESULT_PATH + path_001_200 + "iql_running_reward_argmax.npy")
iql_reward_mean_001_200 = np.mean(iql_reward_001_200, axis = 0)
iql_reward_std_001_200 = np.std(iql_reward_001_200, axis = 0) / np.sqrt(NUM_RUNS)
print(np.mean(iql_reward_mean_001_200))

iql_reward_001_500 = np.load(RESULT_PATH + path_001_500 + "iql_running_reward_argmax.npy")
iql_reward_mean_001_500 = np.mean(iql_reward_001_500, axis = 0)
iql_reward_std_001_500 = np.std(iql_reward_001_500, axis = 0) / np.sqrt(NUM_RUNS)
print(np.mean(iql_reward_mean_001_500))

iql_reward_01_100 = np.load(RESULT_PATH + path_01_100 + "iql_running_reward_argmax.npy")
iql_reward_mean_01_100 = np.mean(iql_reward_01_100, axis = 0)
iql_reward_std_01_100 = np.std(iql_reward_01_100, axis = 0) / np.sqrt(NUM_RUNS)
print(np.mean(iql_reward_mean_01_100))

iql_reward_0001_100 = np.load(RESULT_PATH + path_0001_100 + "iql_running_reward_argmax.npy")
iql_reward_mean_0001_100 = np.mean(iql_reward_0001_100, axis = 0)
iql_reward_std_0001_100 = np.std(iql_reward_0001_100, axis = 0) / np.sqrt(NUM_RUNS)
print(np.mean(iql_reward_mean_0001_100))

iql_reward_00001_100 = np.load(RESULT_PATH + path_00001_100 + "iql_running_reward_argmax.npy")
iql_reward_mean_00001_100 = np.mean(iql_reward_00001_100, axis = 0)
iql_reward_std_00001_100 = np.std(iql_reward_00001_100, axis = 0) / np.sqrt(NUM_RUNS)
print(np.mean(iql_reward_mean_00001_100))

CB91_Blue = '#2CBDFE'
CB91_Green = '#47DBCD'
CB91_Pink = '#F3A0F2'
CB91_Purple = '#9D2EC5'
CB91_Violet = '#661D98'
CB91_Amber = '#F5B14C'
seventh_color = "#4b97ec"
color_list = [CB91_Blue, CB91_Pink, CB91_Green, CB91_Amber, CB91_Purple, CB91_Violet, seventh_color]

# Plot Mean
plt.plot(episodes_indices, iql_reward_mean_001_50.squeeze(), label = "IQL_001_50", color = color_list[0])
plt.fill_between(episodes_indices, iql_reward_mean_001_50-iql_reward_std_001_50, iql_reward_mean_001_50+iql_reward_std_001_50, facecolor = color_list[0], alpha = 0.3)

plt.plot(episodes_indices, iql_reward_mean_001_100.squeeze(), label = "IQL_001_100", color = color_list[1])
plt.fill_between(episodes_indices, iql_reward_mean_001_100-iql_reward_std_001_100, iql_reward_mean_001_100+iql_reward_std_001_50, facecolor = color_list[1], alpha = 0.3)

plt.plot(episodes_indices, iql_reward_mean_001_200.squeeze(), label = "IQ_001_200", color = color_list[2])
plt.fill_between(episodes_indices, iql_reward_mean_001_200-iql_reward_std_001_200, iql_reward_mean_001_200+iql_reward_std_001_200, facecolor = color_list[2], alpha = 0.3)

plt.plot(episodes_indices, iql_reward_mean_001_500.squeeze(), label = "IQL_001_500", color = color_list[3])
plt.fill_between(episodes_indices, iql_reward_mean_001_500-iql_reward_std_001_500, iql_reward_mean_001_500+iql_reward_std_001_500, facecolor = color_list[3], alpha = 0.3)

plt.plot(episodes_indices, iql_reward_mean_01_100.squeeze(), label = "IQL_01_100", color = color_list[4])
plt.fill_between(episodes_indices, iql_reward_mean_01_100-iql_reward_std_01_100, iql_reward_mean_01_100+iql_reward_std_01_100, facecolor = color_list[4], alpha = 0.3)


plt.plot(episodes_indices, iql_reward_mean_0001_100.squeeze(), label = "IQL_0001_100", color = color_list[5])
plt.fill_between(episodes_indices, iql_reward_mean_0001_100-iql_reward_std_0001_100, iql_reward_mean_0001_100+iql_reward_std_0001_100, facecolor = color_list[5], alpha = 0.3)

plt.plot(episodes_indices, iql_reward_mean_00001_100.squeeze(), label = "IQL_00001_100", color = color_list[6])
plt.fill_between(episodes_indices, iql_reward_mean_00001_100-iql_reward_std_00001_100, iql_reward_mean_00001_100+iql_reward_std_00001_100, facecolor = color_list[6], alpha = 0.3)

ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.legend()
plt.ylabel("Task Running Reward")
plt.xlabel("Episodes")
plt.title("Hyperparameters Sweep using IQL")
plt.show()

# iql_reward_ir = np.load(RESULT_PATH + "iql_running_reward_ir_argmax.npy")
# iql_reward_ir_mean = np.mean(iql_reward_ir, axis = 0)
# print(np.mean(iql_reward_ir_mean))
#
# iql_reward = np.load(RESULT_PATH + "iql_running_reward_argmax.npy")
# iql_reward_mean = np.mean(iql_reward, axis = 0)
# print(np.mean(iql_reward_mean))
#
# obl_reward_mi = np.load(RESULT_PATH + "obl_running_reward_mi_log2_argmax.npy")
# obl_reward_mi_mean = np.mean(obl_reward_mi, axis = 0)
# print(np.mean(obl_reward_mi_mean))
#
# obl_reward = np.load(RESULT_PATH + "obl_running_reward_argmax.npy")
# obl_reward_mean = np.mean(obl_reward, axis = 0)
# print(np.mean(obl_reward_mean))
#
# # Plot Mean
# plt.plot(episodes_indices, obl_reward_mi_mean.squeeze(), label = "OBL with MI reward")
# plt.plot(episodes_indices, obl_reward_mean.squeeze(), label = "OBL")
# plt.plot(episodes_indices, iql_reward_ir_mean.squeeze(), label = "IQL with intermediate reward")
# plt.plot(episodes_indices, iql_reward_mean.squeeze(), label = "IQL")
# plt.legend()
# plt.ylabel("Task Reward")
# plt.xlabel("Episodes")
# plt.show()
